#####################################################################################
####Name of File: National_outcome_reg.R
####Input dataset: "Unrestricted.Rdata", "Restricted.Rdata"
####Output : matching coefficients
####Date: 4/24/2011
####Purpose: Matching and logit model for ANC, IFB, SBA, NNM, PNM
#####################################################################################

###load data
load("N:/Repdata/Table1/Restricted.Rdata")
load("N:/Repdata/Table1/Unrestricted.Rdata")

###load libraries
library(xtable)
library(Zelig)
library(tcltk)
library(cem)
library(MatchIt)


##########prepare dataset for matching (create groups on which to match similar to author's way)

set.seed(01238)
summary(Unrestricted)  #zero NAs in dataset so can go ahead with matching
dim(Unrestricted)  #57850

Unrestricted$wealthgroup<-ifelse(Unrestricted$dlhs3_country_quintile<=2,1,
						ifelse(Unrestricted$dlhs3_country_quintile==3, 2,3))

Unrestricted$castegroup<-ifelse(Unrestricted$caste<=2,1,
						ifelse(Unrestricted$caste==3| Unrestricted$caste==4, 2,99))
						
Unrestricted$educgroup<-ifelse(Unrestricted$mateduc_cat<=1,1,
				ifelse(Unrestricted$mateduc_cat==2| Unrestricted$mateduc_cat==3, 2,99))

Unrestricted$agegroup<-ifelse(Unrestricted$matage_num<=2,1,2)

Unrestricted$nbgroup<-ifelse(Unrestricted$nb_cat_num==1,1,
				ifelse(Unrestricted$nb_cat_num==2,2,
				ifelse(Unrestricted$nb_cat_num==3| Unrestricted$nb_cat_num==4,3,99)))


#########################match with state fixed effects
countries.all<-sort((unique(Unrestricted$state)))
all.data<-matrix(list(),nrow=length(countries.all),ncol=1)

for (i in 1:length(countries.all)){

	x<-countries.all[i]
	print(x)
	z<-Unrestricted[Unrestricted$state==x,]
	
	match.output<-matchit(jsy~bpl_card + urban + as.factor(wealthgroup)+as.factor(castegroup)+ as.factor(educgroup)+as.factor(nbgroup)+as.factor(agegroup), data=z, method="exact")
	
	a<-summary(match.output)
	print(a$nn)
	all.data[[i,1]]<-match.data(match.output, weights="weights")

}

########append all datasets together
all.data2<-all.data
full.data<-as.data.frame(all.data2[1,1])
for (i in 2:34){
	full.data<-rbind(full.data,as.data.frame(all.data2[i,1]))
}
dim(full.data)  #163947

##################check balance pre and post matching
unmatched.data<-subset(Unrestricted, select=c(jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,
dlhs3_country_decile,caste, religion2,res_cat, DIST_mean_hh_p_income3,state))

balance1 <- imbalance(group= unmatched.data$jsy, data=unmatched.data, drop=c("jsy"))
balance1 
###pre-match balance: L1=.869, LCS=6.3%

matched.data<-subset(full.data, select=c(jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,
dlhs3_country_decile,caste, religion2,res_cat, DIST_mean_hh_p_income3,state))

balance2 <- imbalance(group= matched.data$jsy, data=matched.data, drop=c("jsy"))
balance2
###L1=.859, LCS=6.9%


summary(full.data)


###put variables back to missing that were 99 
full.data$res_cat<-ifelse(full.data$res_cat==99, NA,  full.data$res_cat)
full.data$mult_birth<-ifelse(full.data$mult_birth==99,NA, full.data$mult_birth)
full.data$mateduc_cat<-ifelse(full.data$mateduc_cat==99,NA, full.data$mateduc_cat)




#############################prepare dataset to go around the bugs for logit

full.data2<-full.data
full.data2$nb_cat<-as.factor(full.data$nb_cat_num)
full.data2$mult_birth<-as.factor(full.data$mult_birth)
full.data2$mateduc_cat<-as.factor(full.data$mateduc_cat)
full.data2$dlhs3_country_decile<-as.factor(full.data$dlhs3_country_decile)

full.data2$res_cat<-as.factor(full.data$res_cat)

################################################################################################################################################
##################################################ANC outcome########################################################################

full.data3<-subset(full.data2, select=c(anc3,jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,dlhs3_country_decile,
caste,religion2,res_cat, DIST_mean_hh_p_income3,state,weights))


#take out goa and uttar pradesh since they dont have those for anc
full.data3<-full.data3[full.data3$state!="Goa",]
full.data3<-full.data3[full.data3$state!="Chandigarh",]

full.data3$caste1<-as.factor(full.data3$caste)
full.data3$state1<-as.factor(full.data3$state)

##change factor levels to be like theirs
full.data3<- within(full.data3, matage_cat <- relevel(matage_cat, ref = "30-34"))
full.data3<- within(full.data3, birth_interval <- relevel(birth_interval, ref = "24<= Months"))
full.data3<- within(full.data3, caste1 <- relevel(caste1, ref = 4))
full.data3<- within(full.data3, state1 <- relevel(state1, ref = "Uttar Pradesh"))


anc.logit<-zelig(anc3~jsy+matage_cat+nb_cat+birth_interval+mult_birth+mateduc_cat+dlhs3_country_decile+caste1+ religion2+res_cat+ DIST_mean_hh_p_income3+state1, model="logit", 
weights="weights", data=full.data3, save.data=T)

summary(anc.logit)

##get the OR for the coefficients

anc.mean<-round(exp(coef(anc.logit)),2)
as.matrix(anc.mean)



#################To PUT INTO LATEX TABLE----TO DO LATER
##get OR estimates and standard errors for odds ratios
estmean<-round(exp(coef(anc.logit)),2)
se<-summary(anc.logit)$coefficients[,2]
se2<-round(exp(coef(anc.logit))*se,2)

ANC.mean.se<-paste(estmean,"(" ,se2, ")",sep="")

###get the p-values
pvalue.logit<-coefficients(summary(anc.logit))[,4]
pvalue.logit<-round(pvalue.logit,4)
pvalue.ANC.final<-ifelse(pvalue.logit<0.0001, "<0.0001", pvalue.logit)



##TABLE 3 estimates
x.treat <- setx(anc.logit, jsy=1)
x.control <- setx(anc.logit, jsy = 0)
s.out <- sim(anc.logit, x = x.control, x1 = x.treat, weights=~dwu)
summary(s.out)

anc.trteffect<-summary(s.out)$qi.stats$fd*100

################################################################################################################################################
##################################################IFB outcome########################################################################


full.data3<-subset(full.data2, select=c(ifb,jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,dlhs3_country_decile,
caste,religion2,res_cat, DIST_mean_hh_p_income3,state,weights))


#take out goa and uttar pradesh since they dont have those for anc
full.data3<-full.data3[full.data3$state!="Goa",]
full.data3<-full.data3[full.data3$state!="Chandigarh",]

full.data3$caste1<-as.factor(full.data3$caste)
full.data3$state1<-as.factor(full.data3$state)

##change factor levels to be like theirs
full.data3<- within(full.data3, matage_cat <- relevel(matage_cat, ref = "30-34"))
full.data3<- within(full.data3, birth_interval <- relevel(birth_interval, ref = "24<= Months"))
full.data3<- within(full.data3, caste1 <- relevel(caste1, ref = 4))
full.data3<- within(full.data3, state1 <- relevel(state1, ref = "Uttar Pradesh"))


ifb.logit<-zelig(ifb~jsy+matage_cat+nb_cat+birth_interval+mult_birth+mateduc_cat+dlhs3_country_decile+caste1+ religion2+res_cat+ DIST_mean_hh_p_income3+state1, model="logit", 
weights="weights", data=full.data3, save.data=T)

summary(ifb.logit)


#################Odds Ratio coefficients and standard errors: To PUT INTO LATEX TABLE----TO DO LATER
##get OR estimates and standard errors for odds ratios
ifb.mean<-round(exp(coef(ifb.logit)),2)
as.matrix(ifb.mean)
se<-summary(ifb.logit)$coefficients[,2]
se2<-round(exp(coef(ifb.logit))*se,2)

IFB.mean.se<-paste(ifb.mean,"(" ,se2, ")",sep="")

###get the p-values
pvalue.logit<-coefficients(summary(ifb.logit))[,4]
pvalue.logit<-round(pvalue.logit,4)
pvalue.IFB.final<-ifelse(pvalue.logit<0.0001, "<0.0001", pvalue.logit)



##TABLE 3 estimates
x.treat <- setx(ifb.logit, jsy=1)
x.control <- setx(ifb.logit, jsy = 0)
s.out <- sim(ifb.logit, x = x.control, x1 = x.treat, weights=~dwu)
summary(s.out)

ifb.trteffect<-summary(s.out)$qi.stats$fd*100




################################################################################################################################################
##################################################Skilled Birth Attendant outcome#########################################################


full.data3<-subset(full.data2, select=c(ab,jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,dlhs3_country_decile,
caste,religion2,res_cat, DIST_mean_hh_p_income3,state,weights))


#take out goa and uttar pradesh since they dont have those for anc
full.data3<-full.data3[full.data3$state!="Goa",]
full.data3<-full.data3[full.data3$state!="Chandigarh",]

full.data3$caste1<-as.factor(full.data3$caste)
full.data3$state1<-as.factor(full.data3$state)

##change factor levels to be like theirs
full.data3<- within(full.data3, matage_cat <- relevel(matage_cat, ref = "30-34"))
full.data3<- within(full.data3, birth_interval <- relevel(birth_interval, ref = "24<= Months"))
full.data3<- within(full.data3, caste1 <- relevel(caste1, ref = 4))
full.data3<- within(full.data3, state1 <- relevel(state1, ref = "Uttar Pradesh"))


sba.logit<-zelig(ab~jsy+matage_cat+nb_cat+birth_interval+mult_birth+mateduc_cat+dlhs3_country_decile+caste1+ religion2+res_cat+ DIST_mean_hh_p_income3+state1, model="logit", 
weights="weights", data=full.data3, save.data=T)

summary(sba.logit)


#################Odds Ratio coefficients and standard errors: To PUT INTO LATEX TABLE----TO DO LATER
##get OR estimates and standard errors for odds ratios
sba.mean<-round(exp(coef(sba.logit)),2)
as.matrix(sba.mean)
se<-summary(sba.logit)$coefficients[,2]
se2<-round(exp(coef(sba.logit))*se,2)

SBA.mean.se<-paste(sba.mean,"(" ,se2, ")",sep="")

###get the p-values
pvalue.logit<-coefficients(summary(sba.logit))[,4]
pvalue.logit<-round(pvalue.logit,4)
pvalue.SBA.final<-ifelse(pvalue.logit<0.0001, "<0.0001", pvalue.logit)



##TABLE 3 estimates
x.treat <- setx(sba.logit, jsy=1)
x.control <- setx(sba.logit, jsy = 0)
s.out <- sim(sba.logit, x = x.control, x1 = x.treat, weights=~dwu)
summary(s.out)

sba.trteffect<-summary(s.out)$qi.stats$fd*100



################################################################################################################################################
##################################################Neonatal Mortality #########################################################


full.data3<-subset(full.data2, select=c(nnm,jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,dlhs3_country_decile,
caste,religion2,res_cat, DIST_mean_hh_p_income3,state,weights))


#take out goa and uttar pradesh since they dont have those for anc
full.data3<-full.data3[full.data3$state!="Goa",]
full.data3<-full.data3[full.data3$state!="Chandigarh",]

full.data3$caste1<-as.factor(full.data3$caste)
full.data3$state1<-as.factor(full.data3$state)

##change factor levels to be like theirs
full.data3<- within(full.data3, matage_cat <- relevel(matage_cat, ref = "30-34"))
full.data3<- within(full.data3, birth_interval <- relevel(birth_interval, ref = "24<= Months"))
full.data3<- within(full.data3, caste1 <- relevel(caste1, ref = 4))
full.data3<- within(full.data3, state1 <- relevel(state1, ref = "Uttar Pradesh"))


nnm.logit<-zelig(nnm~jsy+matage_cat+nb_cat+birth_interval+mult_birth+mateduc_cat+dlhs3_country_decile+caste1+ religion2+res_cat+ DIST_mean_hh_p_income3+state1, model="logit", 
weights="weights", data=full.data3, save.data=T)

summary(nnm.logit)


#################Odds Ratio coefficients and standard errors: To PUT INTO LATEX TABLE----TO DO LATER
##get OR estimates and standard errors for odds ratios
nnm.mean<-round(exp(coef(nnm.logit)),2)
as.matrix(nnm.mean)
se<-summary(nnm.logit)$coefficients[,2]
se2<-round(exp(coef(nnm.logit))*se,2)

NNM.mean.se<-paste(nnm.mean,"(" ,se2, ")",sep="")

###get the p-values
pvalue.logit<-coefficients(summary(nnm.logit))[,4]
pvalue.logit<-round(pvalue.logit,4)
pvalue.NNM.final<-ifelse(pvalue.logit<0.0001, "<0.0001", pvalue.logit)



##TABLE 3 estimates
x.treat <- setx(nnm.logit, jsy=1)
x.control <- setx(nnm.logit, jsy = 0)
s.out <- sim(nnm.logit, x = x.control, x1 = x.treat, weights=~dwu)
summary(s.out)

##multiply by 10 to get result per 1000 live births
nnm.trteffect<-summary(s.out)$qi.stats$fd*10


################################################################################################################################################
##################################################Perinatal Mortality #########################################################


full.data3<-subset(full.data2, select=c(pnm,jsy,matage_cat,nb_cat,birth_interval,mult_birth,mateduc_cat,dlhs3_country_decile,
caste,religion2,res_cat, DIST_mean_hh_p_income3,state,weights))


#take out goa and uttar pradesh since they dont have those for anc
full.data3<-full.data3[full.data3$state!="Goa",]
full.data3<-full.data3[full.data3$state!="Chandigarh",]

full.data3$caste1<-as.factor(full.data3$caste)
full.data3$state1<-as.factor(full.data3$state)

##change factor levels to be like theirs
full.data3<- within(full.data3, matage_cat <- relevel(matage_cat, ref = "30-34"))
full.data3<- within(full.data3, birth_interval <- relevel(birth_interval, ref = "24<= Months"))
full.data3<- within(full.data3, caste1 <- relevel(caste1, ref = 4))
full.data3<- within(full.data3, state1 <- relevel(state1, ref = "Uttar Pradesh"))


pnm.logit<-zelig(pnm~jsy+matage_cat+nb_cat+birth_interval+mult_birth+mateduc_cat+dlhs3_country_decile+caste1+ religion2+res_cat+ DIST_mean_hh_p_income3+state1, model="logit", 
weights="weights", data=full.data3, save.data=T)

summary(pnm.logit)


#################Odds Ratio coefficients and standard errors: To PUT INTO LATEX TABLE----TO DO LATER
##get OR estimates and standard errors for odds ratios
pnm.mean<-round(exp(coef(pnm.logit)),2)
as.matrix(pnm.mean)
se<-summary(pnm.logit)$coefficients[,2]
se2<-round(exp(coef(pnm.logit))*se,2)

PNM.mean.se<-paste(pnm.mean,"(" ,se2, ")",sep="")

###get the p-values
pvalue.logit<-coefficients(summary(pnm.logit))[,4]
pvalue.logit<-round(pvalue.logit,4)
pvalue.PNM.final<-ifelse(pvalue.logit<0.0001, "<0.0001", pvalue.logit)



##TABLE 3 estimates
x.treat <- setx(pnm.logit, jsy=1)
x.control <- setx(pnm.logit, jsy = 0)
s.out <- sim(pnm.logit, x = x.control, x1 = x.treat, weights=~dwu)
summary(s.out)

pnm.trteffect<-summary(s.out)$qi.stats$fd*10








################################################################################################################################################
############################################---put everything into table----##############################################################

#OR coefficients, standard errors, and pvalues
ANC.mean.se
pvalue.ANC.final

IFB.mean.se
pvalue.IFB.final

SBA.mean.se
pvalue.SBA.final

NNM.mean.se
pvalue.NNM.final

PNM.mean.se
pvalue.PNM.final

ORtable<-as.matrix(cbind(ANC.mean.se, pvalue.ANC.final, IFB.mean.se, pvalue.IFB.final,SBA.mean.se,pvalue.SBA.final,PNM.mean.se,pvalue.PNM.final,
NNM.mean.se,pvalue.NNM.final))
ORtable<-ORtable[-1,]
ORtable<-as.data.frame(ORtable)

colnames(ORtable)<-c("ANC(SE)", "p-value", "IFB(SE)", "p-value", "SBA(SE)", "p-value", "PNM(SE)", "p-value","NNM(SE)", "p-value")

table.OR.natl<-xtable(ORtable, caption="Results of Exact Matching and logistic regression analysis between JSY and health outcome odds ratios at the national level",align=rep("l",11))

print(table.OR.natl)



###treatment effects

trt.effect.national<-rbind(anc.trteffect,ifb.trteffect,sba.trteffect,pnm.trteffect,nnm.trteffect)
trt.effect.national<-round(trt.effect.national,3)

trt.eff2<-as.matrix(paste(trt.effect.national[,1],"%(",trt.effect.national[,3],"-",trt.effect.national[,4],")",sep=""))

trt.effect.AUTHOR<-as.matrix(c("10.7%(9.1-12.3)","43.5%(42.5-44.6)","36.6%(35.6-37.7)","-3.7(-5.2--2.2)","-2.3(-3.7--0.9)"))

trt.effect.national.final<-as.data.frame(cbind(trt.eff2,trt.effect.AUTHOR))

rownames(trt.effect.national.final)<-c("ANC","IFB","SBA","PNM","NNM")
colnames(trt.effect.national.final)<-c("Estimate using R", "Estimate using Stata")
trt.effect.national.final

tableob<-xtable(trt.effect.national.final, caption="Differences in estimated treatment effect between R and Stata methods of first differences",align=c("l","l","l"))

print(tableob)







